### Load standardpackages
library(tidyverse) # Collection of all the good stuff like dplyr, ggplot2 ect.
library(magrittr) # For extra-piping operators (eg. %<>%)
library(tidytext)
This session
Refresher:

Bag of words model
- In order for a computer to understand text we need to somehow find a useful representation.
- If you need to compare different texts e.g. articles, you will probably go for keywords. These keywords may come from a keyword-list with for example 200 different keywords
- In that case you could represent each document with a (sparse) vector with 1 for “keyword present” and 0 for “keyword absent”
- We can also get a bit more sophoistocated and count the number of times a word from our dictionary occurs.
- For a corpus of documents that would give us a document-term matrix.
Let’s try creating a bag of words model from our initial example.
text <- tibble(id = c(1:6),
text = c('A text about cats.',
'A text about dogs.',
'And another text about a dog.',
'Why always writing about cats and dogs, always dogs?',
'There are too little text about cats but to many about dogs',
'Cats, cats, cats! I love cats soo much. Cats are way better than dogs'))
text_tidy <- text %>%
unnest_tokens(word, text, token = 'words') %>%
count(id, word)
The document-term matrix (DTM)
- The simplest form of vector representation of text is a ddocument-term matrix
- How to we get a document-term matrix now?
- We could do it by hand, with well-known
dplyr syntax (Note: only works when you have one row per unique document-word pair)
text_tidy %>%
pivot_wider(names_from = word, values_from = n, values_fill = 0)
- We could also use
cast_dtm() to create a DTM in the format of the tm package.
text_dtm <- text_tidy %>%
cast_dtm(id, word, n)
text_dtm
<<DocumentTermMatrix (documents: 6, terms: 25)>>
Non-/sparse entries: 42/108
Sparsity : 72%
Maximal term length: 7
Weighting : term frequency (tf)
- We can simply convert ig to a tibble. Since there exists no direct transfer function, we have to first transform it to a matrix.
- Notice how we recover the rownames
text_dtm %>% as.matrix() %>% as_tibble(rownames = 'id')
- Sidenote: We can also tidy the DTM again to a tidy token-dataframe.
text_dtm %>% tidy()
- We also can directly use a similar function to cast a sparse matrix (which we for sure then also could transform to a tibble again)
text_tidy %>% cast_sparse(row = id, column = word, value = n)
6 x 25 sparse Matrix of class "dgCMatrix"
1 1 1 1 1 . . . . . . . . . . . . . . . . . . . . .
2 1 1 . 1 1 . . . . . . . . . . . . . . . . . . . .
3 1 1 . 1 . 1 1 1 . . . . . . . . . . . . . . . . .
4 . 1 1 . 2 1 . . 2 1 1 . . . . . . . . . . . . . .
5 . 2 1 1 1 . . . . . . 1 1 1 1 1 1 1 . . . . . . .
6 . . 5 . 1 . . . . . . 1 . . . . . . 1 1 1 1 1 1 1
- Finally, we could just apply a text recipe here
library(recipes)
library(textrecipes)
text %>%
recipe(~.) %>%
step_tokenize(text, token = 'words') %>% # tokenize
step_tf(text) %>% # TFIDF weighting
prep() %>% juice()
TF-IDF - Term Frequency - Inverse Document Frequency
- A token is important for a document if appears very often
- A token becomes less important for comparison across a corpus if it appears all over the place in the corpus
- Cat in a corpus of websites talking about cats is not that important
\[w_{i,j} = tf_{i,j}*log(\frac{N}{df_i})\]
- \(w_{i,j}\) = the TF-IDF score for a term i in a document j
- \(tf_{i,j}\) = number of occurence of term i in document j
- \(N\) = number of documents in the corpus
- \(df_i\) = number of documents with term i
# TFIDF weights
text_tidy %<>%
bind_tf_idf(term = word,
document = id,
n = n)
- We obviously could also cast a tf_idf weighted dtm…
text_tidy %>%
select(id, word, tf_idf) %>%
pivot_wider(names_from = word, values_from = tf_idf, values_fill = 0)
- btw: this is equivalent to just running a textrecipe like that:
text %>%
recipe(~.) %>%
step_tokenize(text, token = 'words') %>% # tokenize
step_tfidf(text) %>% # TFIDF weighting
prep() %>% juice()
- A last reminder on the powerful
pairwise_xx() functions from the widyr package
- For instance, pair
library(widyr)
text_tidy %>% pairwise_dist(id, word, tf_idf, method = "manhattan") %>%
mutate(similarity = 1 - (distance / max(distance)) ) %>%
select(-distance) %>%
arrange(desc(similarity))
Dimensionality reduction techniques
rm(list=ls())
- Ok, lets get first some more interesting
text %<>%
rename(id = X1) %>%
filter(language == 'en')
# preprocessing
text_tidy %<>%
#mutate(word = word %>% str_remove_all('[^[:alnum:]]')) %>% ## remove all special characters
filter(str_length(word) > 2 ) %>% # Remove words with less than 3 characters
group_by(word) %>%
filter(n() > 100) %>% # remove words occuring less than 100 times
ungroup() %>%
anti_join(stop_words, by = 'word') # remove stopwords
PCA
text_pca <- text_dtm %>%
column_to_rownames('id') %>%
prcomp(center = TRUE, scale. = TRUE)
text_pca
Standard deviations (1, .., p=6):
[1] 3.207823e+00 2.759080e+00 2.234460e+00 1.388305e+00 4.208882e-01 6.333504e-16
Rotation (n x k) = (25 x 6):
PC1 PC2 PC3 PC4 PC5 PC6
a -0.12298966 -0.20293146 0.3096529650 -0.16145250 -0.10294565 -0.242719650
about -0.27381282 0.16761993 -0.0523034736 0.01945303 0.03687218 0.621255802
cats 0.29282077 0.10623353 -0.0259909788 0.02013339 0.39484526 -0.139416189
text -0.22594973 0.04505067 0.2965162521 -0.09825013 -0.09569899 -0.609033136
dogs 0.05438080 0.04321488 -0.4058754433 -0.06481897 -0.83922505 -0.056366491
and -0.07300087 -0.21088577 -0.1718210259 0.48798584 0.03698842 -0.088662025
another -0.07475529 -0.14019613 0.1983802046 0.55517809 -0.11432042 0.019762232
dog -0.07475529 -0.14019613 0.1983802046 0.55517809 -0.11432042 0.019762232
always -0.01758432 -0.12655562 -0.4157185213 0.06208059 0.16110748 -0.177054662
why -0.01758432 -0.12655562 -0.4157185213 0.06208059 0.16110748 -0.177054662
writing -0.01758432 -0.12655562 -0.4157185213 0.06208059 0.16110748 -0.177054662
are 0.14435186 0.31529231 0.0002177812 0.12216722 -0.01817629 -0.173880335
but -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
little -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
many -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
there -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
to -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
too -0.12079837 0.32924621 -0.0403763595 0.09233359 0.01706538 -0.068982991
better 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
i 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
love 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
much 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
soo 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
than 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
way 0.30339063 0.06957053 0.0406518334 0.06219708 -0.04005677 0.004133281
text_pca[['x']]
PC1 PC2 PC3 PC4 PC5 PC6
1 -0.9053472 -1.026254 1.3687094 -1.4697533 0.659153358 4.284767e-16
2 -0.9903765 -1.025903 0.8434948 -1.5666736 -0.667757859 1.538700e-15
3 -1.5702074 -2.178507 2.0217993 2.1842199 -0.041338206 -2.376571e-16
4 -0.3693521 -1.966548 -4.2368110 0.2442417 0.058256383 -5.152129e-16
5 -2.5373253 5.116156 -0.4114972 0.3632652 0.006170831 -1.451964e-15
6 6.3726085 1.081056 0.4143047 0.2447001 -0.014484506 -7.667478e-16
- Again, alternatively with a recipe…
text_pca <- text %>%
recipe(~.) %>%
update_role(id, new_role = "id") %>%
step_tokenize(text, token = 'words') %>% # tokenize
step_tfidf(text, prefix = NULL) %>% # TFIDF weighting
step_pca(all_predictors(), num_comp = 3) %>% # PCA
prep()
text_pca %>% juice()
text_pca %>%
tidy(3) %>%
filter(component %in% paste0("PC", 1:3)) %>%
mutate(component = fct_inorder(component)) %>%
ggplot(aes(value, terms, fill = terms)) +
geom_col(show.legend = FALSE) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL)
library(embed)
text_UMAP <- text %>%
recipe(~.) %>%
update_role(id, new_role = "id") %>%
step_tokenize(text, token = 'words') %>% # tokenize
step_tfidf(text, prefix = NULL) %>% # TFIDF weighting
step_umap(all_predictors(), n_neighbors = 2) %>%
prep()
Error in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components, :
n_neighbors must be smaller than the dataset size
Embeddings (Bonus)
glove6b <- embedding_glove42b(dimensions =100)
Error in embedding_glove42b(dimensions = 100) :
unused argument (dimensions = 100)
Summary
LS0tCnRpdGxlOiAnKFNvbWV3aGF0KSBhZHZhbmNlZCBOTFA6IHRleHQgdmVjdG9yaXphdGlvbicKYXV0aG9yOiAiRGFuaWVsIFMuIEhhaW4gKGRzaEBidXNpbmVzcy5hYXUuZGspIgpkYXRlOiAiVXBkYXRlZCBgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVCICVkLCAlWScpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGRmX3ByaW50OiBwYWdlZAogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDIKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBmYWxzZQogICAgdGhlbWU6IGZsYXRseQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQojIyMgR2VuZXJpYyBwcmVhbWJsZQpybShsaXN0PWxzKCkpClN5cy5zZXRlbnYoTEFORyA9ICJlbiIpICMgRm9yIGVuZ2xpc2ggbGFuZ3VhZ2UKb3B0aW9ucyhzY2lwZW4gPSA1KSAjIFRvIGRlYWN0aXZhdGUgYW5ub3lpbmcgc2NpZW50aWZpYyBudW1iZXIgbm90YXRpb24KCiMjIyBLbml0ciBvcHRpb25zCmxpYnJhcnkoa25pdHIpICMgRm9yIGRpc3BsYXkgb2YgdGhlIG1hcmtkb3duCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBtZXNzYWdlPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICBjb21tZW50PUZBTFNFLCAKICAgICAgICAgICAgICAgICAgICAgZmlnLmFsaWduPSJjZW50ZXIiCiAgICAgICAgICAgICAgICAgICAgICkKYGBgCgpgYGB7cn0KIyMjIExvYWQgc3RhbmRhcmRwYWNrYWdlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkgIyBDb2xsZWN0aW9uIG9mIGFsbCB0aGUgZ29vZCBzdHVmZiBsaWtlIGRwbHlyLCBnZ3Bsb3QyIGVjdC4KbGlicmFyeShtYWdyaXR0cikgIyBGb3IgZXh0cmEtcGlwaW5nIG9wZXJhdG9ycyAoZWcuICU8PiUpCmBgYAoKYGBge3J9CmxpYnJhcnkodGlkeXRleHQpCmBgYAoKIyBUaGlzIHNlc3Npb24KCgojIFJlZnJlc2hlcjoKCiFbXShodHRwczovL3Nkcy1hYXUuZ2l0aHViLmlvL1NEUy1tYXN0ZXIvMDBfbWVkaWEvbmxwX3RpZHl3b3JrZmxvdy5wbmcpCgoKIyBCYWcgb2Ygd29yZHMgbW9kZWwKCiogSW4gb3JkZXIgZm9yIGEgY29tcHV0ZXIgdG8gdW5kZXJzdGFuZCB0ZXh0IHdlIG5lZWQgdG8gc29tZWhvdyBmaW5kIGEgdXNlZnVsIHJlcHJlc2VudGF0aW9uLgoqIElmIHlvdSBuZWVkIHRvIGNvbXBhcmUgZGlmZmVyZW50IHRleHRzIGUuZy4gYXJ0aWNsZXMsIHlvdSB3aWxsIHByb2JhYmx5IGdvIGZvciBrZXl3b3Jkcy4gVGhlc2Uga2V5d29yZHMgbWF5IGNvbWUgZnJvbSBhIGtleXdvcmQtbGlzdCB3aXRoIGZvciBleGFtcGxlIDIwMCBkaWZmZXJlbnQga2V5d29yZHMKKiBJbiB0aGF0IGNhc2UgeW91IGNvdWxkIHJlcHJlc2VudCBlYWNoIGRvY3VtZW50IHdpdGggYSAoc3BhcnNlKSB2ZWN0b3Igd2l0aCAxIGZvciAia2V5d29yZCBwcmVzZW50IiBhbmQgMCBmb3IgImtleXdvcmQgYWJzZW50IgoqIFdlIGNhbiBhbHNvIGdldCBhIGJpdCBtb3JlIHNvcGhvaXN0b2NhdGVkIGFuZCBjb3VudCB0aGUgbnVtYmVyIG9mIHRpbWVzIGEgd29yZCBmcm9tIG91ciBkaWN0aW9uYXJ5IG9jY3Vycy4KKiBGb3IgYSBjb3JwdXMgb2YgZG9jdW1lbnRzIHRoYXQgd291bGQgZ2l2ZSB1cyBhIGRvY3VtZW50LXRlcm0gbWF0cml4LgoKIVtleGFtcGxlXShodHRwczovL2kuc3RhY2suaW1ndXIuY29tL0MxVU1zLnBuZykKCkxldCdzIHRyeSBjcmVhdGluZyBhIGJhZyBvZiB3b3JkcyBtb2RlbCBmcm9tIG91ciBpbml0aWFsIGV4YW1wbGUuCgpgYGB7cn0KdGV4dCA8LSB0aWJibGUoaWQgPSBjKDE6NiksCiAgICAgICAgICAgICAgIHRleHQgPSBjKCdBIHRleHQgYWJvdXQgY2F0cy4nLAogICAgICAgICAgICAgICAgICAgICAgICAnQSB0ZXh0IGFib3V0IGRvZ3MuJywKICAgICAgICAgICAgICAgICAgICAgICAgJ0FuZCBhbm90aGVyIHRleHQgYWJvdXQgYSBkb2cuJywKICAgICAgICAgICAgICAgICAgICAgICAgJ1doeSBhbHdheXMgd3JpdGluZyBhYm91dCBjYXRzIGFuZCBkb2dzLCBhbHdheXMgZG9ncz8nLAogICAgICAgICAgICAgICAgICAgICAgICAnVGhlcmUgYXJlIHRvbyBsaXR0bGUgdGV4dCBhYm91dCBjYXRzIGJ1dCB0byBtYW55IGFib3V0IGRvZ3MnLAogICAgICAgICAgICAgICAgICAgICAgICAnQ2F0cywgY2F0cywgY2F0cyEgSSBsb3ZlIGNhdHMgc29vIG11Y2guIENhdHMgYXJlIHdheSBiZXR0ZXIgdGhhbiBkb2dzJykpCmBgYAoKYGBge3J9CnRleHRfdGlkeSA8LSB0ZXh0ICU+JSAKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQsIHRva2VuID0gJ3dvcmRzJykgJT4lIAogIGNvdW50KGlkLCB3b3JkKQpgYGAKCgojIyBUaGUgZG9jdW1lbnQtdGVybSBtYXRyaXggKERUTSkKCiogVGhlIHNpbXBsZXN0IGZvcm0gb2YgdmVjdG9yIHJlcHJlc2VudGF0aW9uIG9mIHRleHQgaXMgYSBkZG9jdW1lbnQtdGVybSBtYXRyaXgKKiBIb3cgdG8gd2UgZ2V0IGEgZG9jdW1lbnQtdGVybSBtYXRyaXggbm93PwoqIFdlIGNvdWxkIGRvIGl0IGJ5IGhhbmQsIHdpdGggd2VsbC1rbm93biBgZHBseXJgIHN5bnRheCAoTm90ZTogb25seSB3b3JrcyB3aGVuIHlvdSBoYXZlIG9uZSByb3cgcGVyIHVuaXF1ZSBkb2N1bWVudC13b3JkIHBhaXIpCgpgYGB7cn0KdGV4dF90aWR5ICU+JQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB3b3JkLCB2YWx1ZXNfZnJvbSA9IG4sIHZhbHVlc19maWxsID0gMCkKYGBgCgoqIFdlIGNvdWxkIGFsc28gdXNlIGBjYXN0X2R0bSgpYCB0byBjcmVhdGUgYSBEVE0gaW4gdGhlIGZvcm1hdCBvZiB0aGUgYHRtYCBwYWNrYWdlLgoKYGBge3J9CnRleHRfZHRtIDwtIHRleHRfdGlkeSAlPiUKICBjYXN0X2R0bShpZCwgd29yZCwgbikKYGBgCgpgYGB7cn0KdGV4dF9kdG0gCmBgYAoKKiBXZSBjYW4gc2ltcGx5IGNvbnZlcnQgaWcgdG8gYSB0aWJibGUuIFNpbmNlIHRoZXJlIGV4aXN0cyBubyBkaXJlY3QgdHJhbnNmZXIgZnVuY3Rpb24sIHdlIGhhdmUgdG8gZmlyc3QgdHJhbnNmb3JtIGl0IHRvIGEgbWF0cml4LgoqIE5vdGljZSBob3cgd2UgcmVjb3ZlciB0aGUgcm93bmFtZXMKCmBgYHtyfQp0ZXh0X2R0bSAlPiUgYXMubWF0cml4KCkgJT4lIGFzX3RpYmJsZShyb3duYW1lcyA9ICdpZCcpIApgYGAKCiogU2lkZW5vdGU6IFdlIGNhbiBhbHNvIHRpZHkgdGhlIERUTSBhZ2FpbiB0byBhIHRpZHkgdG9rZW4tZGF0YWZyYW1lLgoKYGBge3J9CnRleHRfZHRtICU+JSB0aWR5KCkKYGBgCiogV2UgYWxzbyBjYW4gZGlyZWN0bHkgdXNlIGEgc2ltaWxhciBmdW5jdGlvbiB0byBjYXN0IGEgc3BhcnNlIG1hdHJpeCAod2hpY2ggd2UgZm9yIHN1cmUgdGhlbiBhbHNvIGNvdWxkIHRyYW5zZm9ybSB0byBhIHRpYmJsZSBhZ2FpbikKCmBgYHtyfQp0ZXh0X3RpZHkgJT4lIGNhc3Rfc3BhcnNlKHJvdyA9IGlkLCBjb2x1bW4gPSB3b3JkLCB2YWx1ZSA9IG4pCmBgYAoKKiBGaW5hbGx5LCB3ZSBjb3VsZCBqdXN0IGFwcGx5IGEgdGV4dCByZWNpcGUgaGVyZQoKYGBge3J9CmxpYnJhcnkocmVjaXBlcykKbGlicmFyeSh0ZXh0cmVjaXBlcykKYGBgCgpgYGB7cn0KdGV4dCAlPiUKICByZWNpcGUofi4pICU+JSAKICBzdGVwX3Rva2VuaXplKHRleHQsIHRva2VuID0gJ3dvcmRzJykgJT4lICMgdG9rZW5pemUKICBzdGVwX3RmKHRleHQpICU+JSAjIFRGSURGIHdlaWdodGluZwogIHByZXAoKSAlPiUganVpY2UoKQpgYGAKCgojIyBURi1JREYgLSBUZXJtIEZyZXF1ZW5jeSAtIEludmVyc2UgRG9jdW1lbnQgRnJlcXVlbmN5CgoqIEEgdG9rZW4gaXMgaW1wb3J0YW50IGZvciBhIGRvY3VtZW50IGlmIGFwcGVhcnMgdmVyeSBvZnRlbgoqIEEgdG9rZW4gYmVjb21lcyBsZXNzIGltcG9ydGFudCBmb3IgY29tcGFyaXNvbiBhY3Jvc3MgYSBjb3JwdXMgaWYgaXQgYXBwZWFycyBhbGwgb3ZlciB0aGUgcGxhY2UgaW4gdGhlIGNvcnB1cwoqICpDYXQqIGluIGEgY29ycHVzIG9mIHdlYnNpdGVzIHRhbGtpbmcgYWJvdXQgY2F0cyBpcyBub3QgdGhhdCBpbXBvcnRhbnQKCiQkd197aSxqfSA9IHRmX3tpLGp9KmxvZyhcZnJhY3tOfXtkZl9pfSkkJAoKLSAkd197aSxqfSQgPSB0aGUgVEYtSURGIHNjb3JlIGZvciBhIHRlcm0gaSBpbiBhIGRvY3VtZW50IGoKLSAkdGZfe2ksan0kID0gbnVtYmVyIG9mIG9jY3VyZW5jZSBvZiB0ZXJtIGkgaW4gZG9jdW1lbnQgagotICROJCA9IG51bWJlciBvZiBkb2N1bWVudHMgaW4gdGhlIGNvcnB1cwotICRkZl9pJCA9IG51bWJlciBvZiBkb2N1bWVudHMgd2l0aCB0ZXJtIGkKCmBgYHtyfQojIFRGSURGIHdlaWdodHMKdGV4dF90aWR5ICU8PiUKICBiaW5kX3RmX2lkZih0ZXJtID0gd29yZCwKICAgICAgICAgICAgICBkb2N1bWVudCA9IGlkLAogICAgICAgICAgICAgIG4gPSBuKQpgYGAKCiogV2Ugb2J2aW91c2x5IGNvdWxkIGFsc28gY2FzdCBhIHRmX2lkZiB3ZWlnaHRlZCBkdG0uLi4KCmBgYHtyfQp0ZXh0X3RpZHkgJT4lCiAgc2VsZWN0KGlkLCB3b3JkLCB0Zl9pZGYpICU+JQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB3b3JkLCB2YWx1ZXNfZnJvbSA9IHRmX2lkZiwgdmFsdWVzX2ZpbGwgPSAwKQpgYGAKCiogYnR3OiB0aGlzIGlzIGVxdWl2YWxlbnQgdG8ganVzdCBydW5uaW5nIGEgdGV4dHJlY2lwZSBsaWtlIHRoYXQ6CgpgYGB7cn0KdGV4dCAlPiUKICByZWNpcGUofi4pICU+JSAKICBzdGVwX3Rva2VuaXplKHRleHQsIHRva2VuID0gJ3dvcmRzJykgJT4lICMgdG9rZW5pemUKICBzdGVwX3RmaWRmKHRleHQpICU+JSAjIFRGSURGIHdlaWdodGluZwogIHByZXAoKSAlPiUganVpY2UoKQpgYGAKCiogQSBsYXN0IHJlbWluZGVyIG9uIHRoZSBwb3dlcmZ1bCBgcGFpcndpc2VfeHgoKWAgZnVuY3Rpb25zIGZyb20gdGhlIGB3aWR5cmAgcGFja2FnZQoqIEZvciBpbnN0YW5jZSwgcGFpcgoKYGBge3J9CmxpYnJhcnkod2lkeXIpCmBgYAoKYGBge3J9CnRleHRfdGlkeSAlPiUgcGFpcndpc2VfZGlzdChpZCwgd29yZCwgdGZfaWRmLCBtZXRob2QgPSAibWFuaGF0dGFuIikgJT4lCiAgbXV0YXRlKHNpbWlsYXJpdHkgPSAxIC0gKGRpc3RhbmNlIC8gbWF4KGRpc3RhbmNlKSkgKSAlPiUKICBzZWxlY3QoLWRpc3RhbmNlKSAlPiUKICBhcnJhbmdlKGRlc2Moc2ltaWxhcml0eSkpCmBgYAoKCgojIERpbWVuc2lvbmFsaXR5IHJlZHVjdGlvbiB0ZWNobmlxdWVzCgpgYGB7cn0Kcm0obGlzdD1scygpKQpgYGAKCiogT2ssIGxldHMgZ2V0IGZpcnN0IHNvbWUgbW9yZSBpbnRlcmVzdGluZyAKCmBgYHtyfQp0ZXh0IDwtIHJlYWRfY3N2KCdodHRwczovL2dpdGh1Yi5jb20vU0RTLUFBVS9TRFMtbWFzdGVyL3Jhdy9tYXN0ZXIvTTIvZGF0YS9jb3JkaXMtaDIwMjByZXBvcnRzLmd6JykKYGBgCgpgYGB7cn0KdGV4dCAlPD4lCiAgcmVuYW1lKGlkID0gWDEpICU+JQogIGZpbHRlcihsYW5ndWFnZSA9PSAnZW4nKQpgYGAKCgoKYGBge3J9CiMgcHJlcHJvY2Vzc2luZwp0ZXh0X3RpZHkgJTw+JQogICNtdXRhdGUod29yZCA9IHdvcmQgJT4lIHN0cl9yZW1vdmVfYWxsKCdbXls6YWxudW06XV0nKSkgJT4lICMjIHJlbW92ZSBhbGwgc3BlY2lhbCBjaGFyYWN0ZXJzCiAgZmlsdGVyKHN0cl9sZW5ndGgod29yZCkgPiAyICkgJT4lICMgUmVtb3ZlIHdvcmRzIHdpdGggbGVzcyB0aGFuICAzIGNoYXJhY3RlcnMKICBncm91cF9ieSh3b3JkKSAlPiUKICBmaWx0ZXIobigpID4gMTAwKSAlPiUgIyByZW1vdmUgd29yZHMgb2NjdXJpbmcgbGVzcyB0aGFuIDEwMCB0aW1lcwogIHVuZ3JvdXAoKSAlPiUKICBhbnRpX2pvaW4oc3RvcF93b3JkcywgYnkgPSAnd29yZCcpICMgcmVtb3ZlIHN0b3B3b3JkcwpgYGAKCgoKCgoKYGBge3IsIGluY2x1ZGU9RkFMU0V9CnRleHRfZHRtIDwtIHRleHQgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0LCB0b2tlbiA9ICd3b3JkcycpICU+JSAKICBjb3VudChpZCwgd29yZCkgJT4lCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IHdvcmQsIHZhbHVlc19mcm9tID0gbiwgdmFsdWVzX2ZpbGwgPSAwKQpgYGAKCgoKIyMgUENBCgpgYGB7cn0KdGV4dF9wY2EgPC0gdGV4dF9kdG0gJT4lIAogIGNvbHVtbl90b19yb3duYW1lcygnaWQnKSAlPiUgCiAgcHJjb21wKGNlbnRlciA9IFRSVUUsIHNjYWxlLiA9IFRSVUUpCmBgYAoKYGBge3J9CnRleHRfcGNhICU+JSBnbGltcHNlKCkKYGBgCgpgYGB7cn0KdGV4dF9wY2FbWyd4J11dCmBgYAoKKiBBZ2FpbiwgYWx0ZXJuYXRpdmVseSB3aXRoIGEgcmVjaXBlLi4uCgpgYGB7cn0KdGV4dF9wY2EgPC0gdGV4dCAlPiUKICByZWNpcGUofi4pICU+JSAKICB1cGRhdGVfcm9sZShpZCwgbmV3X3JvbGUgPSAiaWQiKSAlPiUKICBzdGVwX3Rva2VuaXplKHRleHQsIHRva2VuID0gJ3dvcmRzJykgJT4lICMgdG9rZW5pemUKICBzdGVwX3RmaWRmKHRleHQsIHByZWZpeCA9IE5VTEwpICU+JSAjIFRGSURGIHdlaWdodGluZwogIHN0ZXBfcGNhKGFsbF9wcmVkaWN0b3JzKCksIG51bV9jb21wID0gMykgJT4lICMgUENBCiAgcHJlcCgpIApgYGAKCmBgYHtyfQp0ZXh0X3BjYSAlPiUganVpY2UoKQpgYGAKCmBgYHtyfQp0ZXh0X3BjYSAlPiUKICB0aWR5KDMpICU+JQogIGZpbHRlcihjb21wb25lbnQgJWluJSBwYXN0ZTAoIlBDIiwgMTozKSkgJT4lCiAgbXV0YXRlKGNvbXBvbmVudCA9IGZjdF9pbm9yZGVyKGNvbXBvbmVudCkpICU+JQogIGdncGxvdChhZXModmFsdWUsIHRlcm1zLCBmaWxsID0gdGVybXMpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofmNvbXBvbmVudCwgbnJvdyA9IDEpICsKICBsYWJzKHkgPSBOVUxMKQpgYGAKCgpgYGB7cn0KbGlicmFyeShlbWJlZCkKYGBgCgpgYGB7cn0KdGV4dF9VTUFQIDwtIHRleHQgJT4lCiAgcmVjaXBlKH4uKSAlPiUgCiAgdXBkYXRlX3JvbGUoaWQsIG5ld19yb2xlID0gImlkIikgJT4lCiAgc3RlcF90b2tlbml6ZSh0ZXh0LCB0b2tlbiA9ICd3b3JkcycpICU+JSAjIHRva2VuaXplCiAgc3RlcF90ZmlkZih0ZXh0LCBwcmVmaXggPSBOVUxMKSAlPiUgIyBURklERiB3ZWlnaHRpbmcKICBzdGVwX3VtYXAoYWxsX3ByZWRpY3RvcnMoKSwgbl9uZWlnaGJvcnMgPSAyKSAlPiUKICBwcmVwKCkgCmBgYAoKCgoKCgoKIyMgVG9waWMgTW9kZWxzOiBMREEKCgoKCmBgYHtyfQojVU1BUApgYGAKCgoKIyBFbWJlZGRpbmdzIChCb251cykKCmBgYHtyfQpsaWJyYXJ5KHRleHRkYXRhKQoKZ2xvdmU2YiA8LSBlbWJlZGRpbmdfZ2xvdmUyN2IoZGltZW5zaW9ucyA9IDEwMCkKZ2xvdmU2YgojIFRoZXNlIG1lYmVkZGluZ3MgY2FuIG5vdyBiZSBsb2FkZWQgd2l0aCBzdGVwX3dvcmRlbWJlZGRpbmdzCmBgYAoKCgoKIyBTdW1tYXJ5CgoKCgoKCg==